package com.free4inno.kmstika.service;

import com.free4inno.kmstika.dao.AttachmentDao;
import com.free4inno.kmstika.domain.Attachment;
import com.free4inno.kmstika.utils.TikaStringUtils;
import com.free4inno.kmstika.utils.TikaUtils;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;

import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

/**
 * Author HUYUZHU.
 * Date 2021/3/26 13:30.
 */


@Slf4j
@Service
public class AttachmentTikaService {

    @Autowired
    AttachmentDao attachmentDao;

    @Value("${attatchment.download.url}")
    private String downloadUrl;

    public String parseAll() {
//尝试复现一下tika的使用
//        try {
//            File file = new File("/Users/liuxinyuan/Desktop/bishe.pdf");
//            Tika tika = new Tika();
//            String filecontent = tika.parseToString(file);
//            System.out.println("Extracted Content: " + filecontent);
//            return filecontent;
//        } catch (Exception e) {
//            e.printStackTrace();
//        }


        List<Attachment.AttachmentEnum> statusList = new ArrayList<>();
        //设置需要解析的attachment的类别
        statusList.add(Attachment.AttachmentEnum.READY);
        statusList.add(Attachment.AttachmentEnum.LOADING);
        statusList.add(Attachment.AttachmentEnum.FAILED);
        List<Attachment> attachmentList = attachmentDao.findAllByStatusIn(statusList);
        attachmentList.forEach(attachment -> {
            String text = TikaUtils.parseFile(downloadUrl+attachment.getUrl()); //暂时用这种方法实现一下
            attachment.setOverTime(new Timestamp(new Date().getTime()));
            switch (text) {
                case "Error parsing file":
                case "Error reading file": {
                    attachment.setText(text);
                    attachment.setStatus(Attachment.AttachmentEnum.FAILED);
                    // log.info(attachment.getName() + " " + text);
                    break;
                }
                default: {
                    //清洗无效字符后存入数据库
                    attachment.setText(TikaStringUtils.cleanString(text));
                    attachment.setStatus(Attachment.AttachmentEnum.SUCCESS);
                    log.info(attachment.getName() + " Success parsing file");
                }
            }
            try {
                attachmentDao.saveAndFlush(attachment);
            } catch (Exception e) {
                e.printStackTrace();
                attachment.setText("Error save text");
                attachment.setStatus(Attachment.AttachmentEnum.FAILED);
                attachmentDao.saveAndFlush(attachment);
            }
        });
        // log.info("OverParseAll");
        return "OverParseAll";
    }

}
